perm filename FOO[E,ALS] blob
sn#135753 filedate 1974-12-12 generic text, type C, neo UTF8
COMMENT ⊗ VALID 00021 PAGES
C REC PAGE DESCRIPTION
C00001 00001
C00004 00002 BEGIN "MAIN" COMMENT: THIS FILE HOLDS THE COMBINED ROUTINES
C00007 00003 COMMENT: THE DRIVER STARTS HERE
C00009 00004 PROCEDURE GRAB (INTEGER ARRAY SACK REFERENCE INTEGER SCALE)
C00012 00005 RECURSIVE PROCEDURE DELIVER(SAFE INTEGER ARRAY A)
C00015 00006 PROCEDURE SHIFT (SAFE INTEGER ARRAY R, A INTEGER ARG)
C00022 00007 PROCEDURE ADD1 (SAFE INTEGER ARRAY ARG)
C00025 00008 SIMPLE INTEGER PROCEDURE REVERSE( VALUE INTEGER ARG)
C00027 00009 PROCEDURE FFT1 (SAFE INTEGER ARRAY A REFERENCE INTEGER X)
C00032 00010 PROCEDURE PARTA (SAFE INTEGER ARRAY W, U, V)
C00036 00011 PROCEDURE MULT (SAFE INTEGER ARRAY ANSWER, ARG1, ARG2)
C00039 00012 PROCEDURE PARTB(SAFE INTEGER ARRAY W, U, V)
C00043 00013 SIMPLE BOOLEAN PROCEDURE LESS (SAFE INTEGER ARRAY A, B INTEGER SIZE)
C00044 00014 PROCEDURE PARTC (SAFE INTEGER ARRAY W, W1, W2)
C00050 00015 PROCEDURE EQ34 (SAFE INTEGER ARRAY ANSWER, W REFERENCE INTEGER SCALEA
C00056 00016 FOR I ← -2 STEP 1 UNTIL 0 DO STREAM[I] ← NEW
C00057 00017 PROCEDURE MULTIPLY (SAFE INTEGER ARRAY ANSWER, ARG1, ARG2
C00059 00018 PROCEDURE SCALE (INTEGER ARRAY R, A INTEGER RSHIFT)
C00061 00019 PROCEDURE LSUB (SAFE INTEGER ARRAY R,A,B REFERENCE INTEGER SCALER)
C00064 00020 PROCEDURE LADD (SAFE INTEGER ARRAY R,A,B REFERENCE INTEGER SCALER)
C00068 00021 COMMENT: The driver continues here
C00077 ENDMK
C⊗;
BEGIN "MAIN" COMMENT: THIS FILE HOLDS THE COMBINED ROUTINES;
EQUIRE "SYS:BAIL.REL" LOAD_MODULE;
EXTERNAL PROCEDURE STBAIL; REQUIRE STBAIL INITIALIZATION;
EXTERNAL PROCEDURE BAIL;
COMMENT: THESE ARE THE GLOBAL "CU" VARIABLES;
INTEGER OVMASK, GOODMASK, MODULUS, LSTWORD, BPW, SIZE, BIGSIZE, OFFSET,
M, LSTOVMASK, LSTGDMASK, SK, K, SL, L, SIZER, N, P, OMEGA, PSI,
SCALEA, SCALEB, SCALEC, OGDPT;
COMMENT: THIS IS THE MEANING OF THESE GLOBALS:
M: THE M OF THE BOOK.
SK: THE (SMALL) k OF THE BOOK.
K: THE K OF THE BOOK.
SL: THE (SMALL) l OF THE BOOK.
L: THE L OF THE BOOK.
N: L*K. TOTAL NUMBER OF BITS OF RESULT.
P: 2*L + SK.
MODULUS: 2↑M + 1.
BPW: BITS PER COMPLETE WORD. MUST BE SMALLER THAN FULL WORD.
SIZE: M DIV BPW. BIGNUMS ARE [0:SIZE].
SIZER: N DIV BPW.
BIGSIZE: P DIV BPW. USED IN PARTC, EQ34.
LSTWORD: SIZE - 1. COMPLETE WORDS [0:LSTWORD] IN BIGNUMS.
OFFSET: N MOD BPW. BITS IN PARTIAL WORD IN BIGNUMS.
OVMASK: 1 LSH BPW.
GOODMASK: OVMASK - 1.
LSTOVMASK: 1 LSH OFFSET.
LSTGDMASK: LSTOVMASK - 1.
OMEGA: USED IN SHIFT.
PSI: USED IN PARTB.
SCALE: NUMBER OF SIGNIFICANT BITS.
;
INTEGER CARRY; COMMENT: "PE" VARIABLE;
DEFINE SUSPHIM = "'10";
DEFINE SUSPME = "'2";
DEFINE RUNME = "'1";
REQUIRE 1000 NEW_ITEMS;
INTEGER ITEM RITEM;
COMMENT: THE DRIVER STARTS HERE;
INTEGER AY, BE, CE, I, J, DUMMY;
OPEN(0,"TTY",0,2,0,DUMMY,DUMMY,DUMMY);
WHILE TRUE DO
BEGIN COMMENT: YES, VIRGINIA, THERE IS AN INFINITE LOOP;
BAIL;
OUTSTR('12&'15&'15&"l=");
SL ← INTIN(0);
OUTSTR("k=");
SK ← INTIN(0);
K ← 1 LSH SK;
L ← 1 LSH SL;
M ← L+L;
N ← K*L;
P ← 2*L + SK + 1;
MODULUS ← (1 LSH M) + 1;
OUTSTR(" M="&CVS(M)&" K="&CVS(K));
OUTSTR(" BPW=");
BPW ← INTIN(0);
SIZE ← M DIV BPW;
SIZER ← N DIV BPW;
BIGSIZE ← P DIV BPW;
OFFSET ← M MOD BPW;
LSTWORD ← SIZE - 1;
GOODMASK ← (OVMASK ← 1 LSH BPW) - 1;
LSTGDMASK ← (LSTOVMASK ← 1 LSH OFFSET) - 1;
BEGIN COMMENT: NEED TO DYNANICALLY CREATE A, B, C;
SAFE INTEGER ARRAY SIGNW [-2:K+2];
PROCEDURE GRAB (INTEGER ARRAY SACK; REFERENCE INTEGER SCALE);
BEGIN "GRAB" COMMENT: Takes an octal input, breaks it into
BPW bits per chunk, and throws it into SACK[0,I,*]
according to this algorithm: Every L bits, bump I.
Every BPW bits, up J. Or some such.
Assume that SACK has already been zeroed.
SCALE returns the bit number of the most significant bit.
;
INTEGER I, J, NUM, TEMP, BIT, COUNT, INDEX, TOTALBITS, SAVNUM;
STRING CHAR;
I ← 0;
J ← 0;
SCALE ← 0;
TEMP ← 0;
COUNT ← 0;
TOTALBITS ← 0;
CHAR ← INCHRW;
WHILE '60≤CHAR ∧ CHAR≤'71 DO
BEGIN COMMENT: Treat one character each iteration;
SAVNUM ← NUM ← INTSCAN(CHAR,DUMMY);
IF NUM ≥8 THEN OUTSTR("π")
ELSE FOR INDEX ← 1 STEP 1 UNTIL 3 DO
BEGIN COMMENT: Once for each bit;
BIT ← NUM LAND 1;
NUM ← NUM LSH -1;
TEMP ← (TEMP + BIT) ROT -1;
SCALE ← SCALE + 1;
COUNT ← COUNT + 1;
TOTALBITS ← TOTALBITS + 1;
IF COUNT = BPW THEN
BEGIN COMMENT: Finished a word;
SACK[0,I,J] ← TEMP ROT COUNT;
TEMP ← 0;
COUNT ← 0;
J ← J+1;
END;
IF TOTALBITS = L THEN
BEGIN COMMENT: Must bump I;
SACK[0,I,J] ← TEMP ROT COUNT;
TEMP ← 0;
COUNT ← 0;
J ← 0;
I ← I + 1;
TOTALBITS ← 0;
END;
END;
CHAR ← INCHRW;
END;
SACK[0,I,J] ← TEMP ROT COUNT;
SCALE ← SCALE -
(IF SAVNUM = 0 THEN 4 ELSE (IF SAVNUM = 1 THEN 3
ELSE (IF SAVNUM < 4 THEN 2 ELSE 1)));
END "GRAB";
RECURSIVE PROCEDURE DELIVER(SAFE INTEGER ARRAY A);
BEGIN "DELIVR"
COMMENT this is coroutine that pumps out the bits of array A;
INTEGER I, J, TEMP, COUNT, BITS;
FOR I ← 0 STEP 1 UNTIL K-1 DO
BEGIN
COUNT ← J ← 0;
WHILE TRUE DO
BEGIN "COLUMN"
TEMP ← A[0,I,J];
FOR BITS ← 1 STEP 1 UNTIL BPW DO
BEGIN COMMENT: One bit;
DATUM(RITEM) ← TEMP LAND 1;
RESUME(CALLER(MYPROC),RITEM);
TEMP ← TEMP LSH -1;
COUNT ← COUNT + 1;
IF COUNT=L THEN DONE "COLUMN";
END;
J ← J + 1;
END "COLUMN";
J ← 0;
END;
WHILE TRUE DO
BEGIN COMMENT: Keep saying "0" as long as asked;
DATUM(RITEM) ← 0;
RESUME (CALLER(MYPROC),RITEM);
END;
END "DELIVR";
PROCEDURE OUTPUT (STRING MESG; INTEGER ARRAY SPURT);
BEGIN "OUTPUT"
COMMENT: Prints ogdoanal version of SPURT[0,I,J]
where 0 ≤ I ≤ (K/2 -1) and L bits per column are used..
;
ITEM OCTSTREAM;
ITEM RESSTREAM;
INTEGER ITEMVAR X;
RECURSIVE PROCEDURE OCTAL;
COMMENT use for octal output of the final result;
BEGIN "OCTAL"
INTEGER I, J, T;
SPROUT(OCTSTREAM, DELIVER(SPURT), SUSPHIM+RUNME);
OUTSTR('12&'15&MESG&" ");
FOR I ← 0 STEP 1 UNTIL (N DIV 3) DO
BEGIN
T ← 0;
FOR J ← 0 STEP 1 UNTIL 2 DO
T ← T+(DATUM(X←RESUME(OCTSTREAM)) LSH J);
OUTSTR(CVS(T));
END;
END "OCTAL";
SPROUT(RESSTREAM, OCTAL);
TERMINATE (RESSTREAM);
TERMINATE (OCTSTREAM);
END "OUTPUT";
PROCEDURE SHIFT (SAFE INTEGER ARRAY R, A; INTEGER ARG);
BEGIN "SHIFT"
COMMENT This routine multiplies the number held in array A by
2↑ARG mod (2↑M + 1) and stores the result in the array R. The
action of the multiplication is equivalent to shifting A left by
ARG bits and then subtracting the number that has fallen off the
left end. A great deal of trickery is used to avoid using twice
the storage of A in the computation and to write the main loop in
a way that branching is minimized, as is required in order to
take maximum advantage of the processing power of the Illiac.
The subtraction described above is carried out by having two
pointers, BACKP and FORWP, so that the next word of the
difference is generated by subtracting certain of the bits
pointed to by FORWP from certain bits pointed to by BACKP. The
array A is extended with a bounded number of zeros at both ends.
There are three phases in the process: 1) only FORWP advances in
each iteration, 2) both FORWP and BACKP advance, and 3) only
BACKP advances. Note that A must have 2 unused, 0 locations at
both ends;
INTEGER OFFMASK;
INTEGER FORWP, BACKP, INCF, INCB, STARTB, STOPF,
RSHIFTF, RSHIFTB, LSHIFTF, LSHIFTB, TEMP, SIGN;
COMMENT: "PE";
INTEGER I; COMMENT: "CU";
OFFMASK ← (1 LSH OFFSET) - 1;
CARRY ← 0;
ARG ← ARG MOD (2*M); COMMENT: 2↑(2M)=1;
SIGN ← 1;
IF ARG GEQ M THEN
BEGIN COMMENT: PROTERON HYSTERON;
SIGN ← -1;
ARG ← ARG - M;
END;
STARTB ← (ARG DIV BPW) - 1; COMMENT iteration when BACKP
starts advancing;
STOPF ← ((ARG-OFFSET) DIV BPW) +
(IF ((ARG-OFFSET) MOD BPW) = 0 THEN 0 ELSE 1);
COMMENT iteration when FORWP stops advancing;
FORWP ← LSTWORD - ((ARG-OFFSET) DIV BPW);
COMMENT word containing rightmost of the bits to
fall off the left end;
BACKP ← IF ARG < BPW THEN -1 ELSE -2;
COMMENT when ARG < BPW BACKP "has already advanced 1";
LSHIFTF ← (ARG-OFFSET) MOD BPW;
RSHIFTF ← BPW - LSHIFTF;
COMMENT we use the right LSHIFTF bits of the word
pointed to by FORWP, and RSHIFTF bits of the word
pointed to by FORWP+1;
LSHIFTB ← ARG MOD BPW;
RSHIFTB ← BPW- LSHIFTB;
COMMENT analogous to the above;
INCF ← 1; INCB ← 0; COMMENT set up initial stage;
FOR I ← 0 STEP 1 UNTIL LSTWORD DO
BEGIN COMMENT this is the important inner loop that
performs the subtractions. Notice that the only con-
ditionals used are extremely brief.;
TEMP ← OVMASK - CARRY + SIGN*
(((A[BACKP] LSH -RSHIFTB) LOR
((A[BACKP+1] LSH LSHIFTB) LAND GOODMASK))
-
((A[FORWP] LSH -RSHIFTF) LOR
((A[FORWP+1] LSH LSHIFTF) LAND GOODMASK)));
COMMENT bit hacking. OVMASK is used to make sure
that subtraction always yields a positive result;
CARRY ← 1 -(TEMP LSH -BPW);
COMMENT we have a carry if the OVMASK bit was
killed;
R[I] ← TEMP LAND GOODMASK; COMMENT store result;
IF I GEQ STARTB THEN INCB ← 1;
COMMENT start BACKP moving;
BACKP ← BACKP + INCB;
FORWP ← FORWP + INCF;
IF I GEQ STOPF THEN INCF ← 0;
COMMENT stop FORWP;
END; COMMENT end of inner loop;
COMMENT do it one last time for < bpw bits;
TEMP ← OVMASK - CARRY + SIGN*
(((A[BACKP] LSH -RSHIFTB) LOR
((A[BACKP+1] LSH LSHIFTB) LAND GOODMASK))
-
((A[FORWP] LSH -RSHIFTF) LOR
((A[FORWP+1] LSH LSHIFTF) LAND GOODMASK)));
R[SIZE] ← TEMP LAND ((1 LSH OFFSET) - 1);
IF OFFSET NEQ 0 COMMENT o.k. because result of test same
for all P.E.'s.;
THEN CARRY ← IF ((((A[BACKP] LSH -RSHIFTB) LOR
(A[BACKP+1] LSH LSHIFTB)) LAND OFFMASK)
-
(((A[FORWP] LSH -RSHIFTF) LOR
(A[FORWP+1] LSH LSHIFTF)) LAND OFFMASK))
*SIGN GEQ CARRY THEN 0 ELSE 1;
COMMENT pain in the ass!;
IF A[SIZE] LAND LSTOVMASK THEN CARRY ← (1+SIGN) LSH -1;
END "SHIFT";
PROCEDURE ADD1 (SAFE INTEGER ARRAY ARG);
BEGIN "ADD1"
COMMENT: Just adds 1 mod MODULUS to ARG;
INTEGER TEMP, I;
I ← 0;
WHILE CARRY DO
BEGIN COMMENT: Add in the one to next slot;
IF I > SIZE THEN OUTSTR (" ADD1 FAILED " );
TEMP ← ARG[I] + CARRY;
ARG[I] ← TEMP LAND GOODMASK;
CARRY ← (TEMP LAND OVMASK) LSH - BPW;
I ← I + 1;
END;
END "ADD1";
PROCEDURE ADD(SAFE INTEGER ARRAY C, A, B; VALUE INTEGER CARRY);
COMMENT: C ← A + B + CARRY (MOD 2↑M + 1).
THE GLOBALS USED ARE:
N: THE N OF THE BOOK.
BPW: BITS PER WORD. MUST BE SMALLER THAN FULL WORD.
LSTWORD: N DIV BPW.
OFFSET: N MOD BPW.
OVMASK: 1 LSH BPW.
GOODMASK: OVMASK - 1.
LSTOVMASK: 1 LSH OFFSET.
LSTGDMASK: LSTOVMASK - 1.
;
BEGIN "ADD"
INTEGER TEMP, BITSSEEN; COMMENT: "PE" VARIABLES;
INTEGER I; COMMENT: "CU" VARIABLE;
INTEGER ALOC, BLOC, CLOC; COMMENT: "PE";
ALOC ← LOCATION(A[0]);
BLOC ← LOCATION(B[0]);
CLOC ← LOCATION(C[0]);
BITSSEEN ← 0;
FOR I ← 0 STEP 1 UNTIL LSTWORD DO
BEGIN COMMENT: ADD A[I] TO B[I];
TEMP ← MEMORY[ALOC+I] + MEMORY[BLOC+I] + CARRY;
CARRY ← TEMP LSH -BPW;
BITSSEEN ← BITSSEEN LOR (MEMORY[CLOC+I]←TEMP LAND GOODMASK);
END;
TEMP ← MEMORY[ALOC+SIZE] + MEMORY[BLOC+SIZE] + CARRY;
CARRY ← TEMP LSH -OFFSET;
BITSSEEN ← BITSSEEN LOR (MEMORY[CLOC+SIZE] ← TEMP LAND LSTGDMASK);
IF CARRY AND BITSSEEN=0 THEN
BEGIN COMMENT: -1 OR -2 IS THE ANSWER;
MEMORY[CLOC+SIZE] ← LSTOVMASK;
CARRY ← CARRY - 1;
END;
I ← 0;
WHILE CARRY DO
BEGIN COMMENT: SUBTRACT CARRIES. CANNOT OVERFLOW;
TEMP ← MEMORY[CLOC+I] + OVMASK - CARRY;
MEMORY[CLOC+I] ← TEMP LAND GOODMASK;
CARRY ← 1 - (TEMP LSH -BPW);
I ← I + 1;
END;
END "ADD";
SIMPLE INTEGER PROCEDURE REVERSE( VALUE INTEGER ARG);
COMMENT procedure to invert the SK least significant bits of ARG;
BEGIN COMMENT: Use the binary chop method;
DEFINE M2 = "'777000777000";
DEFINE M2C= "'000777000777";
DEFINE M3 = "'740740740740";
DEFINE M3C= "'017017017017";
DEFINE M3F= "'020020020020";
DEFINE M4 = "'614614614614";
DEFINE M4C= "'142142142142";
DEFINE M5 = "'512512512512";
DEFINE M5C= "'245245245245";
ARG ← ARG ROT 18;
ARG ← ((ARG LAND M2) LSH -9) LOR ((ARG LAND M2C) LSH 9);
ARG ← ((ARG LAND M3) LSH -5) LOR ((ARG LAND M3C) LSH 5)
LOR (ARG LAND M3F);
ARG ← ((ARG LAND M4) LSH -2) LOR ((ARG LAND M4C) LSH 2)
LOR (ARG LAND M3F);
ARG ← ((ARG LAND M5) LSH -1) LOR ((ARG LAND M5C) LSH 1)
LOR (ARG LAND M3F);
RETURN(ARG LSH (SK-36));
END;
PROCEDURE FFT1 (SAFE INTEGER ARRAY A; REFERENCE INTEGER X);
BEGIN "FFT1"
COMMENT This is the forward version of the Fast Fourier Transform
routine to be used in multiplying large numbers according to the
Strassen - Schonhage technique (see Knuth v.2 p.269). The con-
itions of the problem require that we have OMEGA↑(2↑M) = 1 (mod M).
Note that since w is always a power of 2, the multiplication
appearing in the inner loop is really a shift.;
INTEGER EXPMASK, CURBIT, NEGBIT, Y, TEMPA, J; COMMENT: "CU" TYPE;
INTEGER I; COMMENT: "PE" TYPE;
SAFE INTEGER ARRAY A1, A2, A3 [-2:SIZE+2]; COMMENT: TEMPS;
A1[-2] ← A1[-1] ← A1[SIZE+1] ← A1[SIZE+2] ← 0;
X ← 0;
EXPMASK ← 0; COMMENT MASKS OFF THE J LEAST SIGNIFICANT BITS;
CURBIT ← K; COMMENT WE TAKE LINEAR COMBINATIONS OF A'S
CURBIT APART;
FOR J ← 1 STEP 1 UNTIL SK DO
BEGIN Y ← X; X ← 1-X;
CURBIT ← CURBIT LSH -1;
EXPMASK ← (EXPMASK LSH 1) + 1;
NEGBIT ← LNOT CURBIT;
FOR I ← 0 STEP 1 UNTIL K-1 DO
BEGIN COMMENT: HERE IS THE INNER LOOP;
ARRBLT(A1[0], A[Y,I LOR CURBIT,0], SIZE+1);
SHIFT(A2, A1, OMEGA*((REVERSE(I) LAND EXPMASK) LSH (SK-J)));
ARRBLT(A1[0], A[Y, I LAND NEGBIT, 0], SIZE+1);
ADD(A3, A1, A2, CARRY);
ARRBLT(A[X,I,0], A3[0], SIZE+1);
END;
END;
COMMENT the array A now contains the result of the FFT
in a permuted order: A[i] really contains A[reverse(i)]. Rather
than permute, we use FFT2 for the backwards application with
the net effect that the final result is in correct order.;
END "FFT1";
PROCEDURE FFT2 (SAFE INTEGER ARRAY A; REFERENCE INTEGER X);
BEGIN "FFT2"
COMMENT This is the backward version of the Fast Fourier Transform
routine to be used in multiplying large numbers according to the
Strassen - Schonhage technique (see Knuth v.2 p.269). The con-
itions of the problem require that we have OMEGA↑(2↑M) = 1 (mod M).
Note that since w is always a power of 2, the multiplication
appearing in the inner loop is really a shift.;
INTEGER EXPMASK, CURBIT, NEGBIT, Y, TEMPA, J; COMMENT: "CU" TYPE;
INTEGER I; COMMENT: "PE" TYPE;
SAFE INTEGER ARRAY A1, A2, A3 [-2:SIZE+2]; COMMENT: TEMPS;
A1[-2] ← A1[-1] ← A1[SIZE+1] ← A1[SIZE+2] ← 0;
X ← 0;
EXPMASK ← 0; COMMENT MASKS OFF THE J LEAST SIGNIFICANT BITS;
CURBIT ← 1; COMMENT WE TAKE LINEAR COMBINATIONS OF A'S
CURBIT APART;
FOR J ← 1 STEP 1 UNTIL SK DO
BEGIN Y ← X; X ← 1-X;
EXPMASK ← (EXPMASK LSH 1) + 1;
NEGBIT ← LNOT CURBIT;
FOR I ← 0 STEP 1 UNTIL K-1 DO
BEGIN COMMENT: HERE IS THE INNER LOOP;
ARRBLT(A1[0], A[Y,I LOR CURBIT,0], SIZE+1);
SHIFT(A2, A1, OMEGA*((I LAND EXPMASK) LSH (SK-J)));
ARRBLT(A1[0], A[Y, I LAND NEGBIT, 0], SIZE+1);
ADD(A3, A1, A2, CARRY);
ARRBLT(A[X,I,0], A3[0], SIZE+1);
END;
CURBIT ← CURBIT LSH 1;
END;
COMMENT the array A now contains the result of the FFT
in a proper order given that A originally had permuted order.;
END "FFT2";
PROCEDURE PARTA (SAFE INTEGER ARRAY W, U, V);
BEGIN "PARTA"
COMMENT: Both U and V are three-dimensional: [X,I,J] where X=1
or 2, 0 ≤ I < K, and 0 ≤ J ≤ SIZE. In the notation of the
book, U[X,I,*] is U(i). (X is just a flag ). Part A
Calculates W' from U and V, and returns the answer as W[I],
where 0 ≤ I < K. This is done by first extracting the
lower-order SK bits from each U(I) and V(I), and then
performing the strange multiplication of equation 4.3.3(36),
p. 271.
;
COMMENT: THIS IS THE MEANING OF THE GLOBALS:
BPW: BITS PER COMPLETE WORD. MUST BE SMALLER THAN FULL WORD.
SK: THE (SMALL) k OF THE BOOK.
K: THE K OF THE BOOK.
;
INTEGER ILIM, JLIM, I, J, TEMP, TEMPA, TEMPB;
SAFE INTEGER ARRAY A, B [0:K-1];
COMMENT: A and B are copies of U and V;
IF SK ≥ 12 THEN OUTSTR (" PARTA FAILED. SK TOO LARGE. ");
ILIM ← K - 1; COMMENT: Also used as a mask of SK bits;
JLIM ← (SK-1) DIV BPW;
FOR I ← 0 STEP 1 UNTIL ILIM DO
BEGIN COMMENT: Transfer U(I), V(I) into A(I) and B(I);
TEMPA ← TEMPB ← 0;
FOR J ← 0 STEP 1 UNTIL JLIM DO
BEGIN COMMENT: We need exactly SK bits;
TEMPA ← (TEMPA ROT -BPW) + U[0,I,J];
TEMPB ← (TEMPB ROT -BPW) + V[0,I,J];
END;
A[I] ← (TEMPA ROT (BPW*JLIM)) LAND ILIM;
B[I] ← (TEMPB ROT (BPW*JLIM)) LAND ILIM;
END;
COMMENT: Zero out W;
ARRCLR(W);
COMMENT: Forward multiplication;
FOR I ← 0 STEP 1 UNTIL ILIM DO
BEGIN
JLIM ← K - I - 1;
FOR J ← 0 STEP 1 UNTIL JLIM DO
W[I+J] ← W[I+J] + A[J]*B[I];
END;
COMMENT: Backward multiplication;
FOR I ← ILIM STEP -1 UNTIL 1 DO
BEGIN
JLIM ← K - I;
FOR J ← ILIM STEP -1 UNTIL JLIM DO
BEGIN
TEMP ← I + J - K;
W[TEMP] ← (K + W[TEMP] - (A[J]*B[I] LAND ILIM));
COMMENT: We add K to assure result positive mod K;
END;
END;
FOR I ← 0 STEP 1 UNTIL ILIM DO
BEGIN
COMMENT: Mask off the last SK bits of C(I);
W[I] ← W[I] LAND ILIM;
END;
END "PARTA";
PROCEDURE MULT (SAFE INTEGER ARRAY ANSWER, ARG1, ARG2);
BEGIN "MULT"
COMMENT: All the arguments are one-dimensional,
[0,SIZE]. They are to be multiplied mod MODULUS, and
the answer returned in a similar array.
The method requires that BPW not be so large that
multiplication of two numbers of BPW bits overflow.
;
INTEGER I, J, TEMP, CARRY;
SAFE INTEGER ARRAY INT [0:2*SIZE+1];
ARRCLR(INT);
FOR I ← 0 STEP 1 UNTIL SIZE DO
FOR J ← 0 STEP 1 UNTIL SIZE DO
INT[I+J] ← INT[I+J] + ARG1[I]*ARG2[J];
CARRY ← 0;
FOR I ← 0 STEP 1 UNTIL 2*SIZE + 1 DO
BEGIN COMMENT: Propagate carries;
TEMP ← INT[I] + CARRY;
INT[I] ← TEMP LAND GOODMASK;
CARRY ← TEMP LSH -BPW;
END;
IF CARRY THEN OUTSTR(" @ BUG IN MULT");
FOR I ← 0 STEP 1 UNTIL LSTWORD DO
BEGIN COMMENT: Subtract high digit from low digit;
TEMP ← INT[I] + OVMASK - CARRY
- (INT[I+SIZE] LSH -OFFSET)
- ((INT[I+SIZE+1] LSH (BPW - OFFSET)) LAND GOODMASK);
ANSWER[I] ← TEMP LAND GOODMASK;
CARRY ← 1 - (TEMP LSH -BPW);
END;
COMMENT: Handle last word separately;
TEMP ← (INT[SIZE] LAND LSTGDMASK) + LSTOVMASK - CARRY
- (INT[2*SIZE] LSH -OFFSET)
- (INT[2*SIZE+1] LSH (BPW - OFFSET));
ANSWER[SIZE] ← TEMP LAND LSTGDMASK;
CARRY ← 1 - (TEMP LSH -OFFSET);
I ← 0;
WHILE CARRY ∧ I<SIZE DO
BEGIN COMMENT: If there is a carry, it must be added back;
TEMP ← ANSWER[I] + CARRY;
ANSWER[I] ← TEMP LAND GOODMASK;
CARRY ← TEMP LSH -BPW;
I ← I + 1;
END;
IF CARRY THEN
COMMENT: Handle carry into last word separately;
ANSWER[SIZE] ← ANSWER[SIZE] + CARRY;
END "MULT";
PROCEDURE PARTB(SAFE INTEGER ARRAY W, U, V);
BEGIN "PARTB"
COMMENT: Both U and V are three-dimensional: [X,I,J] where X=1
or 2, 0 ≤ I < K, and 0 ≤ J ≤ SIZE. In the notation of the
book, U[X,I,*] is U(i). (X is just a flag ). Part B
CalCulates W'' from U nd V, and returns the answer as
W[0,*,*]. This is done by first multiplying U(i) and V(i) by
PSI↑i, then forming U~ and V~, the FFT1 of U and V
respectively, then multiplying W~(i)=U~(i)*V~(i) (mod
MODULUS), then taking the FFT2 of W~, yielding W, then doing
appropriate multiplications on W to yield the result. W will
be in this format: W[0,K-i,*] = W(i) for 0 ≤ i < K.
;
SAFE INTEGER ARRAY TEMP2, TEMP3 [0:SIZE];
SAFE INTEGER ARRAY TEMP1 [-2:SIZE+2]; COMMENT: Needed for shifting;
INTEGER I; COMMENT: In all loops on I, the iterations should be
distributed among PEs;
INTEGER X; COMMENT: Flag returned by FFT: 0 or 1;
TEMP1[-2] ← TEMP1[-1] ← TEMP1[SIZE+1] ← TEMP1[SIZE+2] ← 0;
FOR I ← 1 STEP 1 UNTIL K-1 DO
BEGIN COMMENT: Multiply U(I), V(I) by PSI↑I;
ARRBLT (TEMP1[0],U[0,I,0],SIZE+1);
SHIFT (TEMP2,TEMP1,PSI*I);
IF CARRY THEN ADD1 (TEMP2);
ARRBLT (U[0,I,0],TEMP2[0],SIZE+1);
ARRBLT (TEMP1[0],V[0,I,0],SIZE+1);
SHIFT (TEMP2,TEMP1,PSI*I);
IF CARRY THEN ADD1 (TEMP2);
ARRBLT (V[0,I,0],TEMP2[0],SIZE+1);
END;
FFT1(U,X);
FFT1(V,X);
FOR I ← 0 STEP 1 UNTIL K-1 DO
BEGIN COMMENT: Form W~ by multiplying;
ARRBLT (TEMP1[0], V[X,I,0], SIZE+1);
ARRBLT (TEMP2[0], U[X,I,0], SIZE+1);
MULT (TEMP3, TEMP1, TEMP2);
ARRBLT (U[0,I,0], TEMP3[0], SIZE+1);
END;
FFT2(U,X);
FOR I ← 1 STEP 1 UNTIL K-1 DO
BEGIN COMMENT: Rescale W by shifting;
ARRBLT (TEMP1[0], U[X,I,0], SIZE+1);
SHIFT (TEMP2, TEMP1, 2*M - SK - PSI*(K-I));
IF CARRY THEN ADD1 (TEMP2);
ARRBLT (W[I,0], TEMP2[0], SIZE+1);
END;
COMMENT: Take care of W(0), which goes at end;
ARRBLT (TEMP1[0], U[X,0,0], SIZE+1);
SHIFT (TEMP2, TEMP1, 2*M - SK);
IF CARRY THEN ADD1 (TEMP2);
ARRBLT (W[K,0], TEMP2[0], SIZE+1);
END "PARTB";
SIMPLE BOOLEAN PROCEDURE LESS (SAFE INTEGER ARRAY A, B; INTEGER SIZE);
BEGIN "LESS"
COMMENT: this routine is a long-precision LESS.
Both A and B are [0:SIZE]. Returns TRUE <=> A ≤ B.
;
BOOLEAN FLAG; INTEGER I;
FLAG ← TRUE;
I ← SIZE;
WHILE (FLAG AND (I GEQ 0)) DO
BEGIN
FLAG ← FLAG AND (A[I] ≤ B[I]);
I ← I-1;
END;
RETURN(FLAG);
END "LESS";
PROCEDURE PARTC (SAFE INTEGER ARRAY W, W1, W2);
BEGIN "PARTC"
COMMENT: Forms W out of W1 and W2 as in 4.3.3(35).
This is done independently for each W(R) as follows:
The SK-bit quantity W1(R)-W2(R) is formed, then multiplied by
2↑(2l)+1 by a shift and an addition, then W2 is added (now keeping
2L+SK bits). Then the result is compared with (R+1)2↑(2L), and
if it is greater, some is subtracted off.
W1 is of the form [0:K-1], where W1(r) = W1[r].
W2 is of the form [X,I,J], where W2(r) = W2[0,K-r,*].
W is of the form [I,J], where W(r) = W[r,*] and 0 ≤ J ≤ BIGSIZE,
where BIGSIZE is large enough to hold 2L+SK bits.
;
INTEGER TEMPA, TEMPB, R, INT, J, JLIM, ILIM;
SAFE INTEGER ARRAY TEMP1, TEMP2, TEMP3, TEMP4 [-2:BIGSIZE + 2];
SAFE INTEGER ARRAY SIGNW [0:K-1];
INTEGER SAVOFFSET, SAVSIZE, SAVLSTOVMASK, SAVLSTWORD, SAVM,SAVLSTGDMASK;
COMMENT: CHANGE GLOBALS;
SAVM ← M; M ← 2*L + SK + 1;
SAVOFFSET ← OFFSET; OFFSET ← M MOD BPW;
SAVSIZE ← SIZE; SIZE ← BIGSIZE;
SAVLSTOVMASK ← LSTOVMASK; LSTOVMASK ← 1 LSH OFFSET;
SAVLSTGDMASK ← LSTGDMASK; LSTGDMASK ← LSTOVMASK - 1;
SAVLSTWORD ← LSTWORD; LSTWORD ← BIGSIZE - 1;
JLIM ← (SK-1) DIV BPW;
ILIM ← K - 1; COMMENT: Also used as an SK-bit mask;
FOR R ← 0 STEP 1 UNTIL ILIM DO
BEGIN COMMENT: Take care of W(R);
TEMPA ← 0;
FOR J ← 0 STEP 1 UNTIL JLIM DO
COMMENT: We need exactly SK bits;
TEMPA ← (TEMPA ROT -BPW) + W2[K-R,J];
INT ← (K + W1[R] - ((TEMPA ROT (BPW*JLIM)) LAND ILIM)) LAND ILIM;
ARRCLR(TEMP1);
ARRCLR(TEMP2);
ARRCLR(TEMP3);
ARRCLR(TEMP4);
FOR J ← 0 STEP 1 UNTIL JLIM DO
BEGIN COMMENT: Store into TEMP1;
TEMP1[J] ← INT LAND GOODMASK;
INT ← INT LSH -BPW;
END;
SHIFT (TEMP2, TEMP1, 2*L);
ADD (TEMP3, TEMP1, TEMP2, 0);
TEMP1[SIZE] ← 0;
ARRBLT (TEMP1[0], W2[K-R,0], SAVSIZE+1);
ADD (TEMP2, TEMP1, TEMP3, 0);
COMMENT: Now we must prepare (R+1)2↑(2L) for the comparison;
TEMP1[-2] ← 0;
ARRBLT (TEMP1[-1], TEMP1[-2], BIGSIZE+3);
INT ← R + 1;
FOR J ← 0 STEP 1 UNTIL JLIM +1 DO
BEGIN COMMENT: Store R+1 into TEMP1;
TEMP1[J] ← INT LAND GOODMASK;
INT ← INT LSH -BPW;
END;
SHIFT (TEMP3, TEMP1, 2*L);
IF LESS (TEMP3, TEMP2, BIGSIZE) THEN
BEGIN COMMENT: Must subtract 2↑SK*(2↑2L + 1).
Do all arithmetic holding 2L+SK+1 bits;
INTEGER WORD, BIT;
WORD ← ((2*L+SK) DIV BPW);
BIT ← (2*L+SK) MOD BPW;
TEMP4[WORD] ← 1 LSH BIT;
WORD ← (SK DIV BPW);
BIT ← SK MOD BPW;
TEMP4[WORD] ← 1 LSH BIT;
IF LESS (TEMP2, TEMP4, BIGSIZE)
THEN BEGIN
SHIFT(TEMP1, TEMP2, 2*L+SK+1);
ADD(TEMP3, TEMP1, TEMP4, 0);
SIGNW[R] ← -1;
END
ELSE BEGIN
SHIFT(TEMP1, TEMP4, 2*L+SK+1);
ADD(TEMP3, TEMP1, TEMP2, 0);
SIGNW[R] ← 1;
END;
ARRBLT (W[R,0], TEMP3[0], BIGSIZE + 1);
END
ELSE ARRBLT (W[R,0], TEMP2[0], BIGSIZE+1);
END;
COMMENT: RESTORE GLOBALS;
M ← SAVM;
OFFSET ← SAVOFFSET;
LSTGDMASK ← SAVLSTGDMASK;
LSTOVMASK ← SAVLSTOVMASK;
SIZE ← SAVSIZE;
LSTWORD ← SAVLSTWORD;
END "PARTC";
PROCEDURE EQ34 (SAFE INTEGER ARRAY ANSWER, W; REFERENCE INTEGER SCALEA;
INTEGER SCALE1, SCALE2; REFERENCE INTEGER EXPA;
INTEGER EXP1, EXP2);
BEGIN "EQ34"
COMMENT this file contains the code for performing the final
step of the Strassen - Schoenhage multiplication procedure, eq. (34)
of 4.3.3 in Knuth. The key control structure used is the bit pro-
ducing coroutine. Note that at most 3 of the W's can overlap at
any given point. We link the W's into 3 bitstreams that are added
together to produce the final result. ANSWER is [0:1,0:K-1,0:SIZE].
W is [0:K-1,0:BIGSIZE].
;
ITEM ADDSTREAM, PACKSTREAM;
INTEGER ITEMVAR X;
SAFE INTEGER ARRAY SIGN [-2:0];
INTEGER I, J;
ITEMVAR ARRAY STREAM[-2:0], PUMPW[-2:K+2];
RECURSIVE PROCEDURE DELIVERW ( INTEGER I);
COMMENT this procedure is a coroutine that delivers the bits
of the i-th W;
BEGIN "DELW"
INTEGER TEMP, MASK, J; MASK ← 1;
FOR J ← 0 STEP 1 UNTIL BIGSIZE DO
BEGIN INTEGER K;
TEMP ← W[I,J];
FOR K ← 1 STEP 1 UNTIL BPW DO
BEGIN
DATUM(RITEM) ← TEMP LAND MASK;
RESUME(CALLER(MYPROC), RITEM);
TEMP ← TEMP LSH -1;
END;
END;
END "DELW";
RECURSIVE PROCEDURE BITSTREAM ( INTEGER I);
COMMENT this is also a coroutine that delivers bits. There will be 3
incarnations of this procedure, each delivering the bits of all
W[I] such that I MOD 3 = 0, 1, or 2 respectively. For correct
alignment L-SK 0 bits are inserted between successive W's;
BEGIN "BITSTM"
INTEGER OLDI; OLDI ← I;
WHILE I < K+3 DO
BEGIN INTEGER J;
SPROUT(PUMPW[I], DELIVERW(I), SUSPHIM+RUNME);
FOR J ← 1 STEP 1 UNTIL P DO
BEGIN
X ← RESUME(PUMPW[I]);
RESUME(ADDSTREAM,X);
END;
TERMINATE(PUMPW[I]);
FOR J ← 1 STEP 1 UNTIL L-SK-1 DO
BEGIN
DATUM(RITEM) ← 0;
RESUME(ADDSTREAM, RITEM);
END;
I ← I + 3; SIGN[OLDI] ← SIGNW[I];
END;
END "BITSTM";
RECURSIVE PROCEDURE ADDER;
COMMENT in this coroutine the bitstreams produced by the 3 in-
carnation of BITSTREAM are added together;
BEGIN "ADDER"
INTEGER I, J, MASK, CARRY; MASK ← 1; CARRY ← 0;
FOR I ← -2 STEP 1 UNTIL 0 DO SPROUT(STREAM[I], BITSTREAM(I), SUSPHIM+RUNME);
COMMENT start up the 3 coroutines;
SIGN[0] ← SIGNW[0];
FOR J ← 1 STEP 1 UNTIL L DO
BEGIN
RESUME(STREAM[-1]);
END;
FOR J ← 1 STEP 1 UNTIL 2*L DO
BEGIN
RESUME(STREAM[-2]);
END;
COMMENT align the bitstreams;
FOR J ← 1 STEP 1 UNTIL N+L+SK+1 DO
BEGIN INTEGER TEMP;
INTEGER XM2, XM1, X0;
XM2 ← DATUM(X←RESUME(STREAM[-2]));
XM1 ← DATUM(X←RESUME(STREAM[-1]));
X0 ← DATUM(X←RESUME(STREAM[0]));
TEMP ← XM2*SIGN[-2]+XM1*SIGN[-1]+X0*SIGN[0]+CARRY;
DATUM(RITEM) ← TEMP LAND MASK;
RESUME(PACKSTREAM, RITEM);
CARRY ← (TEMP DIV 2);
COMMENT note that we need an arithmetic right shift by 1;
END;
END "ADDER";
RECURSIVE PROCEDURE PACK (SAFE INTEGER ARRAY A);
BEGIN "PACK"
INTEGER I, TEMP, J, BITS, COUNT;
SCALEA ← SCALE1 + SCALE2;
EXPA ← EXP1 + EXP2 - OGDPT;
IF SCALEA GEQ (N DIV 2) - 4 THEN
BEGIN
INTEGER TEMP; COMMENT: Places discarded;
TEMP ← SCALEA - (N DIV 2) + 5;
FOR I ← 1 STEP 1 UNTIL TEMP DO
RESUME (ADDSTREAM);
SCALEA ← SCALEA - TEMP;
EXPA ← EXPA + TEMP;
END;
FOR I ← 0 STEP 1 UNTIL K-1 DO
BEGIN
J ← COUNT ← 0;
WHILE TRUE DO
BEGIN "COLUMN"
TEMP ← 0;
FOR BITS ← 1 STEP 1 UNTIL BPW DO
BEGIN
TEMP ← (TEMP + DATUM
(X←RESUME(ADDSTREAM))) ROT -1;
COUNT ← COUNT + 1;
IF COUNT = L THEN
BEGIN
A[0,I,J] ← TEMP ROT
(((L-1) MOD BPW)+1);
DONE "COLUMN"
END
END;
A[0,I,J] ← TEMP ROT BPW;
J ← J + 1;
END "COLUMN"
END;
END "PACK";
FOR I ← -2 STEP 1 UNTIL 0 DO STREAM[I] ← NEW;
FOR I ← -2 STEP 1 UNTIL K+2 DO PUMPW[I] ← NEW;
SPROUT(ADDSTREAM,ADDER,SUSPHIM+RUNME);
SPROUT(PACKSTREAM,PACK(ANSWER));
FOR I ← -2 STEP 1 UNTIL 0 DO
BEGIN
SPROUT (PUMPW[K+2+I],DELIVERW(K+2+I),SUSPHIM+RUNME);
TERMINATE(STREAM[I]);
DELETE(STREAM[I]);
END;
FOR I ← -2 STEP 1 UNTIL K+2 DO
BEGIN
TERMINATE(PUMPW[I]);
DELETE(PUMPW[I]);
END;
TERMINATE(PACKSTREAM);
TERMINATE(ADDSTREAM);
END "EQ34";
PROCEDURE MULTIPLY (SAFE INTEGER ARRAY ANSWER, ARG1, ARG2;
REFERENCE INTEGER SCALEA; INTEGER SCALE1, SCALE2;
REFERENCE INTEGER EXPA; INTEGER EXP1, EXP2);
BEGIN "MULTIP"
COMMENT: This is the world-renowned algorithm of Shonhage-
Strassen. The arguments and answer are in this packed
form: [X,I,J] where 0 ≤ X ≤ 1 (but X=0 has the interesting
numbers), 0 ≤ I < K, and 0 ≤ J ≤ SIZE, where SIZE is big
enough to hold L bits in words of width BPW. Here is the
meaning of all the globals:
OMEGA: SPECIAL FACTOR. USED IN FFT.
PSI: SPECIAL FACTOR. USED IN STEP B. AS IN BOOK.
NOTE: This routine clobbers ARG1 and ARG2!
;
SAFE INTEGER ARRAY W1 [0:K-1];
SAFE INTEGER ARRAY W2 [0:K,0:SIZE];
COMMENT: Remember that W2(i) is at W2[K-i,*];
SAFE INTEGER ARRAY W [-2:K+2,0:BIGSIZE];
INTEGER I;
ARRCLR(W);
PSI ← 1 LSH (SL + 1 - SK);
OMEGA ← PSI LSH 1;
PARTA (W1, ARG1, ARG2);
PARTB (W2, ARG1, ARG2);
FOR I ← -2 STEP 1 UNTIL K+2 DO
SIGNW[I] ← 1;
PARTC (W, W1, W2);
ARRCLR(ANSWER);
EQ34 (ANSWER, W, SCALEA, SCALE1, SCALE2, EXPA, EXP1, EXP2);
END "MULTIP";
PROCEDURE SCALE (INTEGER ARRAY R, A; INTEGER RSHIFT);
BEGIN "SCALE"
COMMENT this procedure shifts array A to the right
by RSHIFT bits and places the result in R.
It is used for scaling properly the arguments
of an addition or a subtraction.;
INTEGER I, J, COUNT, TEMP, BITS;
INTEGER ITEM BITSTREAM;
INTEGER ITEMVAR X;
OUTPUT("ENTERING SCALE, ARG IS ", A);
OUTSTR("RSHIFT = "&CVS(RSHIFT));
SPROUT(BITSTREAM, DELIVER(A), SUSPHIM+RUNME);
FOR I ← 1 STEP 1 UNTIL RSHIFT DO
RESUME(BITSTREAM); COMMENT: Waste RSHIFT bits;
FOR I ← 0 STEP 1 UNTIL (K DIV 2) DO
BEGIN "ILOOP"
J ← COUNT ← 0;
WHILE TRUE DO
BEGIN "COLUMN"
TEMP ← 0;
FOR BITS ← 1 STEP 1 UNTIL BPW DO
BEGIN
TEMP ← (TEMP + DATUM
(X←RESUME(BITSTREAM))) ROT -1;
COUNT ← COUNT + 1;
IF COUNT = L THEN
BEGIN
R[0,I,J] ← TEMP ROT
(((L-1) MOD BPW)+1);
DONE "COLUMN"
END
END;
R[0,I,J] ← TEMP ROT BPW;
J ← J + 1;
END "COLUMN"
END;
TERMINATE(BITSTREAM);
END "SCALE";
PROCEDURE LSUB (SAFE INTEGER ARRAY R,A,B; REFERENCE INTEGER SCALER);
BEGIN "LSUB"
COMMENT: R ← A - B (MOD MODULUS).
All arrays are [X,I,J] where
X=0, 0 ≤ I < K, 0 ≤ J ≤ SIZE.
;
INTEGER CARRY, TEMP; COMMENT: "PE" TYPE;
INTEGER I, J; COMMENT: "CU" type;
INTEGER HIGHWORD, ADDAMOUNT, SAVESC;
INTEGER SOFFSET, SLSTGDMASK, SLSTWORD;
INTEGER SLSTOVMASK, SSIZE;
SAVESC ← SCALER ← 0;
HIGHWORD ← 0;
ADDAMOUNT ← 0;
CARRY ← 0;
SOFFSET ← OFFSET; OFFSET ← (L MOD BPW);
SLSTGDMASK ← LSTGDMASK; LSTGDMASK ← (1 LSH OFFSET) - 1;
SLSTWORD ← LSTWORD; LSTWORD ← (L DIV BPW) - 1;
SSIZE ← SIZE; SIZE ← LSTWORD + 1;
SLSTOVMASK ← LSTOVMASK; LSTOVMASK ← 1 LSH OFFSET;
FOR I ← 0 STEP 1 UNTIL K-1 DO
BEGIN "ILOOP"
FOR J ← 0 STEP 1 UNTIL LSTWORD DO
BEGIN "JLOOP"
TEMP ← OVMASK + A[0,I,J] - B[0,I,J]
- CARRY;
R[0,I,J] ← TEMP LAND GOODMASK;
CARRY ← 1 - (TEMP LSH -BPW);
SAVESC ← SAVESC + ADDAMOUNT;
IF TEMP LAND GOODMASK
THEN BEGIN
HIGHWORD ← TEMP LAND GOODMASK;
SCALER ← SAVESC;
END;
ADDAMOUNT ← BPW;
END;
IF OFFSET THEN
BEGIN "OFFSET"
TEMP ← LSTOVMASK + A[0,I,SIZE]
- B[0,I,SIZE] - CARRY;
R[0,I,SIZE] ← TEMP LAND LSTGDMASK;
CARRY ← 1 - (TEMP LSH -OFFSET);
SAVESC ← SAVESC + ADDAMOUNT;
IF TEMP LAND LSTGDMASK
THEN BEGIN
HIGHWORD ← TEMP LAND LSTGDMASK;
SCALER ← SAVESC;
END;
ADDAMOUNT ← OFFSET;
END;
END;
SAVESC ← 0;
FOR I ← 0 STEP 1 UNTIL BPW-1 DO
BEGIN COMMENT: LOOK FOR POS OF HIGHEST 1 BIT;
IF HIGHWORD LAND (1 LSH I) THEN SAVESC ← I;
END;
SCALER ← SCALER + SAVESC;
LSTGDMASK ← SLSTGDMASK;
OFFSET ← SOFFSET;
LSTWORD ← SLSTWORD;
LSTOVMASK ← SLSTOVMASK;
SIZE ← SSIZE;
END "LSUB";
PROCEDURE LADD (SAFE INTEGER ARRAY R,A,B; REFERENCE INTEGER SCALER);
BEGIN "LADD"
COMMENT: R ← A + B (MOD MODULUS).
All arrays are [X,I,J] where
X=0, 0 ≤ I < K, 0 ≤ J ≤ SIZE.
;
INTEGER CARRY, TEMP; COMMENT: "PE" TYPE;
INTEGER I, J; COMMENT: "CU" type;
INTEGER HIGHWORD, ADDAMOUNT, SAVESC;
INTEGER SOFFSET, SLSTGDMASK, SLSTWORD;
INTEGER SLSTOVMASK, SSIZE;
SAVESC ← SCALER ← 0;
HIGHWORD ← 0;
ADDAMOUNT ← 0;
CARRY ← 0;
SOFFSET ← OFFSET; OFFSET ← (L MOD BPW);
SLSTGDMASK ← LSTGDMASK; LSTGDMASK ← (1 LSH OFFSET) - 1;
SLSTWORD ← LSTWORD; LSTWORD ← (L DIV BPW) - 1;
SSIZE ← SIZE; SIZE ← LSTWORD + 1;
SLSTOVMASK ← LSTOVMASK; LSTOVMASK ← 1 LSH OFFSET;
FOR I ← 0 STEP 1 UNTIL K-1 DO
BEGIN "ILOOP"
FOR J ← 0 STEP 1 UNTIL LSTWORD DO
BEGIN "JLOOP"
TEMP ← A[0,I,J] + B[0,I,J] + CARRY;
R[0,I,J] ← TEMP LAND GOODMASK;
CARRY ← TEMP LSH -BPW;
SAVESC ← SAVESC + ADDAMOUNT;
IF TEMP LAND GOODMASK
THEN BEGIN
HIGHWORD ← TEMP LAND GOODMASK;
SCALER ← SAVESC;
END;
ADDAMOUNT ← BPW;
END;
IF OFFSET THEN
BEGIN "OFFSET"
TEMP ← A[0,I,SIZE] + B[0,I,SIZE] + CARRY;
R[0,I,SIZE] ← TEMP LAND LSTGDMASK;
CARRY ← TEMP LSH -OFFSET;
SAVESC ← SAVESC + ADDAMOUNT;
IF TEMP LAND LSTGDMASK
THEN BEGIN
HIGHWORD ← TEMP LAND LSTGDMASK;
SCALER ← SAVESC;
END;
ADDAMOUNT ← OFFSET;
END;
END;
SAVESC ← 0;
FOR I ← 0 STEP 1 UNTIL BPW-1 DO
BEGIN COMMENT: LOOK FOR POS OF HIGHEST 1 BIT;
IF HIGHWORD LAND (1 LSH I) THEN SAVESC ← I;
END;
SCALER ← SCALER + SAVESC;
LSTGDMASK ← SLSTGDMASK;
OFFSET ← SOFFSET;
LSTWORD ← SLSTWORD;
LSTOVMASK ← SLSTOVMASK;
SIZE ← SSIZE;
END "LADD";
SIMPLE PROCEDURE BITON(INTEGER K; INTEGER ARRAY A);
IF K ≥ 0 THEN
BEGIN INTEGER COL, ROW, BIT, TEMP;
COL ← K DIV L;
ROW ← (TEMP ← K MOD L) DIV BPW;
BIT ← TEMP MOD BPW;
A[0,COL,ROW] ← A[0,COL,ROW] LOR (1 LSH BIT);
END;
COMMENT: The driver continues here;
INTEGER OGDPT, ITERNO, EXPS, EXPA, EXPB, EXPASAV, EXPBSAV,
EXPRCG, EXPROG, SCLS, SCLA, SCLB, SCLASAV,
EXPC, SCLBSAV, SCLRCG, SCLROG, SCLC;
INTEGER ARRAY S, A, B, C, ASAV, BSAV, THREE, RCG, ROG
[0:1, 0:K-1, 0:SIZE];
OUTSTR("ITERNO="); ITERNO ← INTIN(0);
OUTSTR("OGDPT="); OGDPT←3*INTIN(0);
ARRCLR(A); COMMENT: A ← ROG ← RCG ← 1;
SCLA ← (N DIV 2) -1;
EXPA ← OGDPT - SCLA;
BITON(OGDPT-EXPA, A);
ARRCLR(BSAV); COMMENT: BSAV ← 2;
EXPBSAV ← 0;
SCLBSAV ← 1 + OGDPT;
BITON(1-EXPB+OGDPT, BSAV);
ARRBLT(ROG[0,0,0], A[0,0,0], 2*K*(SIZE+1));
SCLROG ← SCLA; EXPROG ← EXPA;
ARRBLT(RCG[0,0,0], A[0,0,0], 2*K*(SIZE+1));
SCLRCG ← SCLA; EXPRCG ← EXPA;
ARRBLT(S[0,0,0],A[0,0,0], 2*K* (SIZE+1));
EXPS ← EXPA - 1; SCLS ← SCLA;
FOR J ← 1 STEP 1 UNTIL 10 DO
BEGIN COMMENT B ← 1/SQRT(BSAV);
ARRBLT(B[0,0,0], ROG[0,0,0], 2*K*(SIZE+1));
EXPB ← EXPROG; SCLB ← SCLROG;
ARRBLT(ASAV[0,0,0], ROG[0,0,0], 2*K*(SIZE+1));
EXPASAV ← EXPROG; SCLASAV ← SCLROG;
MULTIPLY(ASAV, ASAV, B, SCLASAV, SCLASAV,
SCLB, EXPASAV, EXPASAV, EXPB);
ARRBLT(B[0,0,0],BSAV[0,0,0], 2*K*(SIZE+1));
EXPB ← EXPBSAV; SCLB ← SCLBSAV;
MULTIPLY(ASAV, ASAV, B, SCLASAV, SCLASAV,
SCLB, EXPASAV, EXPASAV, EXPB);
ARRCLR(THREE);
BITON(-EXPASAV+OGDPT, THREE);
BITON(1-EXPASAV+OGDPT, THREE); COMMENT: 3;
LSUB(ASAV, THREE, ASAV, SCLASAV);
MULTIPLY(ROG, ROG, ASAV, SCLROG,
SCLROG, SCLASAV, EXPROG, EXPROG,
EXPASAV);
EXPROG ← EXPROG - 1;
END;
ARRBLT(B[0,0,0], ROG[0,0,0], 2*K*(SIZE+1));
SCLB ← SCLROG; EXPB ← EXPROG;
FOR I ← 1 STEP 1 UNTIL ITERNO DO
BEGIN
OUTPUT("B is now:",B);
ARRBLT(ASAV[0,0,0], A[0,0,0], 2*K*(SIZE+1));
EXPASAV ← EXPA; SCLASAV ← SCLA;
ARRBLT(BSAV[0,0,0], B[0,0,0], 2*K*(SIZE+1));
EXPBSAV ← EXPB; SCLBSAV ← SCLB;
IF EXPA > EXPB
THEN BEGIN
SCALE(B, B, EXPA-EXPB);
SCLB ← SCLB + EXPB - EXPA;
EXPB ← EXPA;
END
ELSE IF EXPB > EXPA
THEN BEGIN
SCALE(A, A, EXPB-EXPA);
SCLA ← SCLA + EXPA - EXPB;
EXPA ← EXPB;
END;
LADD(A,A,B,SCLA);
EXPA ← EXPA - 1;
IF EXPASAV > EXPB
THEN BEGIN
SCALE(B, B, EXPASAV-EXPB);
SCLB ← SCLB + EXPB - EXPASAV;
EXPB ← EXPASAV;
END
ELSE IF EXPB > EXPASAV
THEN BEGIN
SCALE(ASAV, ASAV, EXPB-EXPASAV);
SCLASAV ← SCLASAV + EXPASAV - EXPB;
EXPASAV ← EXPB;
END;
LSUB(B, ASAV, B, SCLB);
ARRBLT(C[0,0,0], B[0,0,0], 2*K*(SIZE+1));
SCLC ← SCLB; EXPC ← EXPB;
MULTIPLY(B, C, B, SCLB, SCLC, SCLB, EXPB, EXPC, EXPB);
EXPB ← EXPB - 2 + I;
IF EXPS > EXPB
THEN BEGIN
SCALE(B, B, EXPS-EXPB);
SCLB ← SCLB + EXPB - EXPS;
EXPB ← EXPS;
END
ELSE IF EXPB > EXPS
THEN BEGIN
SCALE(S, S, EXPB-EXPS);
SCLS ← SCLS + EXPS - EXPB;
EXPS ← EXPB;
END;
LADD(S, S, B, SCLS);
MULTIPLY(B, ASAV, BSAV, SCLB, SCLASAV, SCLBSAV,
EXPB, EXPASAV, EXPBSAV);
FOR J ← 1 STEP 1 UNTIL 10 DO
BEGIN COMMENT: BSAV ← 1/SQRT(B);
ARRBLT(BSAV[0,0,0], ROG[0,0,0], 2*K*(SIZE+1));
SCLBSAV ← SCLROG; EXPBSAV ← EXPROG;
ARRBLT(ASAV[0,0,0], ROG[0,0,0], 2*K*(SIZE+1));
SCLASAV ← SCLROG; EXPASAV ← EXPROG;
MULTIPLY(ASAV, ASAV, BSAV, SCLASAV, SCLASAV,
SCLBSAV, EXPASAV, EXPASAV, EXPBSAV);
ARRBLT(BSAV[0,0,0],B[0,0,0], 2*K*(SIZE+1));
EXPBSAV ← EXPB; SCLBSAV ← SCLB;
MULTIPLY(ASAV, ASAV, BSAV, SCLASAV, SCLASAV,
SCLBSAV, EXPASAV, EXPASAV, EXPBSAV);
ARRCLR(THREE);
BITON(-EXPASAV+OGDPT, THREE);
BITON(1-EXPASAV+OGDPT, THREE); COMMENT: 3;
LSUB(ASAV, THREE, ASAV, SCLASAV);
MULTIPLY(ROG, ROG, ASAV, SCLROG,
SCLROG, SCLASAV, EXPROG, EXPROG,
EXPASAV);
EXPROG ← EXPROG - 1;
END;
ARRBLT(BSAV[0,0,0], ROG[0,0,0], 2*K*(SIZE+1));
SCLBSAV ← SCLROG; EXPBSAV ← EXPROG;
FOR J ← 1 STEP 1 UNTIL 10 DO
BEGIN COMMENT: B ← 1/BSAV;
ARRBLT(B[0,0,0], RCG[0,0,0], 2*K*(SIZE+1));
SCLB ← SCLRCG; EXPB ← EXPRCG;
ARRBLT(ASAV[0,0,0], BSAV[0,0,0], 2*K*(SIZE+1));
SCLASAV ← SCLBSAV; EXPASAV ← EXPBSAV;
MULTIPLY(ASAV, ASAV, B, SCLASAV, SCLASAV,
SCLB, EXPASAV, EXPASAV, EXPB);
ARRCLR(THREE);
BITON(1-EXPASAV+OGDPT, THREE); COMMENT: 2;
LSUB(ASAV, THREE, ASAV, SCLASAV);
MULTIPLY(RCG, RCG, ASAV, SCLRCG,
SCLRCG, SCLASAV, EXPRCG, EXPRCG,
EXPASAV);
END;
ARRBLT(B[0,0,0], RCG[0,0,0], 2*K*(SIZE+1));
SCLB ← SCLRCG; EXPB ← EXPRCG;
END;
OUTPUT("Final B is:",B);
MULTIPLY(B, A, B, SCLB, SCLA, SCLB, EXPB, EXPA, EXPB);
EXPB ← EXPB + 1;
ARRCLR(THREE);
BITON(-EXPS+OGDPT, THREE); COMMENT: 1;
LSUB(S, THREE, S, SCLS);
FOR J ← 1 STEP 1 UNTIL 10 DO
BEGIN COMMENT A ← 1/S;
ARRBLT(A[0,0,0], RCG[0,0,0], 2*K*(SIZE+1));
SCLA ← SCLRCG; EXPA ← EXPRCG;
ARRBLT(ASAV[0,0,0], S[0,0,0], 2*K*(SIZE+1));
SCLASAV ← SCLS; EXPASAV ← EXPS;
MULTIPLY(ASAV, ASAV, A, SCLASAV, SCLASAV,
SCLA, EXPASAV, EXPASAV, EXPA);
ARRCLR(THREE);
BITON(1-EXPASAV+OGDPT, THREE); COMMENT: 2;
LSUB(ASAV, THREE, ASAV, SCLASAV);
MULTIPLY(RCG, RCG, ASAV, SCLRCG,
SCLRCG, SCLASAV, EXPRCG, EXPRCG,
EXPASAV);
END;
ARRBLT(A[0,0,0], RCG[0,0,0], 2*K*(SIZE+1));
SCLA ← SCLRCG; EXPA ← EXPRCG;
MULTIPLY(B, B, A, SCLB, SCLB, SCLA, EXPB, EXPB, EXPA);
OUTPUT("THE VALUE OF PI IS:",B);
END;
END;
END;
END;
END "MAIN";